---
title: "05 Propensity Score Weighting Data-Children"
author: "Ye Shen, Radhika Tampi, Raj Vatsa"
date: "11/27/2021"
output: pdf_document
---
Balancing weights were created using household wealth index, a time-varying confounding variable that substantially differed between intervention and control in 2014 and 2016. 
```{r, results = 'hide', message = FALSE}
knitr::opts_chunk$set("warning"=FALSE)
library(tidyverse)
library(survey)
library(WeightIt)
library(survey)
library(questionr)#use survey design in ggplot
library(patchwork)
library(cobalt)
library(gt)
```
## Read Cleaned Data and Prepare for Balance Weighting
Create a categorical variable of group+year combo for each dataset to apply multinomial weighting using method proposed by Stuart et al 2014 for integrating DiD and weighting
```{r}
children_alldisease_did<-readRDS("data/cleaned_children_did_alldis.rds")%>%
                          mutate(group_year=as.factor(case_when(
                            group==1&year==0 ~ 1, #Treatment in pre-period: Group 1 in Stuart et al 2014
                            group==1&year==1 ~ 2, #Treatment in post-period: Group 2 in Stuart et al 2014
                            group==0&year==0 ~ 3, #Control in pre-period: Group 3 in Stuart et al 2014
                            group==0&year==1 ~ 4 #Control in post-period: Group 4 in Stuart et al 2014
                          )))


 svy_children_alldisease_did <- svydesign(
    id = ~cluster, strata = ~group, weights = ~wmweight,
    data = children_alldisease_did
  )
 

 
```
## Before balance weighting
```{r}
svy_children_14<-subset(svy_children_alldisease_did, year==0)
svy_children_16<-subset(svy_children_alldisease_did, year==1)

```

### Distributions of wealth index score between treatment and control in 2014 and 2016
```{r,message=FALSE}
child_wealth_2014plot<-ggsurvey(design = svy_children_14, mapping = aes(x=as.factor(group) , y=menage_score_bien_etre))+
  geom_violin()+
  scale_y_continuous(name="Wealth Index Score", limits=c(-1,5),breaks=seq(-1,5,1))+
  scale_x_discrete(name="", labels=c("0"="Control", "1"="Treatment"))+
  theme_bw()+
  labs(title="2014")
child_wealth_2016plot<-ggsurvey(design = svy_children_16, mapping = aes(x=as.factor(group) , y=menage_score_bien_etre))+
  geom_violin()+
   scale_y_continuous(name="Wealth Index Score", limits=c(-1,5),breaks=seq(-1,5,1))+
  scale_x_discrete(name="", labels=c("0"="Control", "1"="Treatment"))+
  theme_bw()+
  labs(title="2016")
child_wealth_2014plot+child_wealth_2016plot +plot_annotation(tag_levels=c("A"),title="Fig X1. Distributions of wealth index score between treatment and control \n in pre and post- periods among children with any illnesses")
```
### Household Wealth Index Imbalance QQPlot 
```{r,message=FALSE}
gg_qqplotimbalance<-function(a, b, quantiles = seq(0, 1, 0.01)){

  
  ggplot(mapping = aes(x = quantile(a, quantiles), 
                       y = quantile(b, quantiles))) + 
    geom_point() +
    geom_abline(aes(slope = 1, intercept = 0), linetype = 2) 
    
}

# svy_children_16_trt<-subset(svy_children_16, group==1)
# svy_children_16_con<-subset(svy_children_16, group==0)

child_wealth_2016qqplot<-gg_qqplotimbalance(subset(svy_children_16, group==1)$variables$menage_score_bien_etre,subset(svy_children_16, group==0)$variables$menage_score_bien_etre)+
      labs(x = "Treatment Wealth Index Score",
         y = "Control Wealth Index Score",
         title="2016")+
          scale_y_continuous(limits = c(-1,5))+
          scale_x_continuous(limits = c(-1,5))+
          theme_classic()
          
                              
child_wealth_2014qqplot<-gg_qqplotimbalance(subset(svy_children_14, group==1)$variables$menage_score_bien_etre,subset(svy_children_14, group==0)$variables$menage_score_bien_etre)+
      labs(x = "Treatment Wealth Index Score",
         y = "Control Wealth Index Score",
         title="2014")+
          scale_y_continuous(limits = c(-1,5))+
          scale_x_continuous(limits = c(-1,5))+
          theme_classic()

child_wealth_2014qqplot+child_wealth_2016qqplot +plot_annotation(tag_levels=c("A"),title="Fig X2. Imbalance qqplot on wealth index score between treatment and control \n in pre and post- periods among children with any illnesses")
```
### Combine the two imbalance plots into one figure
```{r}
(child_wealth_2014plot+child_wealth_2016plot) / (child_wealth_2014qqplot+child_wealth_2016qqplot)+plot_annotation(tag_levels=c("A"),title="Fig 1. Imbalance in household wealth index score between treatment and control \n in pre and post- periods among households with children with any illness", caption="Note: Panels A & B are violin plots displaying the wealth index score distributions for each group. \n Panels C & D are qq plots contrasting distributions in treatment against control.")&
    theme(plot.tag = element_text(size = 10), text = element_text( face="bold"))

ggsave("table_fig/paper/fig1_imbal_wealth_child.png",width = 200,height=200, units="mm")
```


## Balance Weighting

### Optimization-Based Weights
Method with best performance re: smallest coefficients of variance close to 0 and large effective sample sizes. We used this method to create weights for the extension analysis, but showed how the second best method and the defaul method performed below.
```{r}
Weighted_children_opt<-weightit(group_year~menage_score_bien_etre,data=children_alldisease_did, s.weights = children_alldisease_did$wmweight, method="optweight")
summary(Weighted_children_opt)


cobalt::bal.tab(Weighted_children_opt)


bal.plot(Weighted_children_opt,"menage_score_bien_etre", which="both")+
  labs(title="Fig 3. Balance among 4 treatment-by-year groups before and after weighting \n on wealth index score among households with children with any illnesses", x="Wealth Index Score", fill="Treatment and Period")+
  scale_fill_discrete(labels=c("1"="Treatment Preperiod","2"="Treatment Postperiod","3"="Control Preperiod","4"="Control Postperiod"))+
  theme(plot.title = element_text(size=14, face="bold"))
ggsave("table_fig/paper/fig3_weighting_balance_child.png")

```
The greatest mean difference between the two groups after adjusting using Optimization-Based Weights is zero.


### Entropy Balancing 
Close second best performance re: smallest coefficients of variance close to 0 and large effective sample sizes

Note: This method seems to be the same as "ebcw" Empirical Balancing Calibration Weights
```{r}
Weighted_children_ebal<-weightit(group_year~menage_score_bien_etre,data=children_alldisease_did, s.weights = children_alldisease_did$wmweight, method="ebal")
summary(Weighted_children_ebal)
cobalt::bal.tab(Weighted_children_ebal)


bal.plot(Weighted_children_ebal,"menage_score_bien_etre", which="both")+
  labs(title="FigX4. Balance among 4 treatment-by-year groups before and after weighting \n on wealth index score for children with any illnesses")+
  scale_fill_discrete(labels=c("1"="Treatment Preperiod","2"="Treatment Postperiod","3"="Control Preperiod","4"="Control Postperiod"))

```
### Multinomial regression PS (default)
```{r}
Weighted_children_ps<-weightit(group_year~menage_score_bien_etre,data=children_alldisease_did, s.weights = children_alldisease_did$wmweight, method="ps")
summary(Weighted_children_ps)
cobalt::bal.tab(Weighted_children_ps)


bal.plot(Weighted_children_ps,"menage_score_bien_etre", which="both")+
  labs(title="FigX4. Balance among 4 treatment-by-year groups before and after weighting \n on wealth index score for children with any illnesses", x="Wealth Index Score")+
  scale_fill_discrete(labels=c("1"="Treatment Preperiod","2"="Treatment Postperiod","3"="Control Preperiod","4"="Control Postperiod"))
```



## After balance weighting using optweight
### Assign survey design to did dataset using the new weights
```{r}
children_alldisease_did$balwt<-Weighted_children_opt$weights

svy_children_alldisease_did_wted <- svydesign(
    id = ~cluster, strata = ~group, weights = ~balwt,
    data = children_alldisease_did
  )

```
### Subset to each year for weighted summary stats in table 2
```{r}
#subset did survey data to 2014 and 2016 for summary stats
  svy_children_alldisease_2014 <- subset(svy_children_alldisease_did_wted,year==0)
  svy_children_alldisease_2016 <- subset(svy_children_alldisease_did_wted,year==1)

```

### Weighted DiD regression adjusting for wealth index score
  * Outcome: Access to any public_level care (Visit to a PHF or CHW)
```{r}

#Adjusted and weighted DID regression on any visIt to a PHF or CHW

  did_children_wtd <- svyglm(ALL.public_level~year*group+menage_score_bien_etre, design = svy_children_alldisease_did_wted, family = gaussian)
#Unadjusted and weighted DID regression on any visIt to a PHF or CHW
did_children_wtd_unadj<-svyglm(ALL.public_level~year*group, design = svy_children_alldisease_did_wted, family = gaussian)
#Unadjusted and unweighted DID regression on any visIt to a PHF or CHW
did_children_unwtd_unadj<-svyglm(ALL.public_level~year*group, design = svy_children_alldisease_did, family = gaussian)
```

## Table for children with any illnesses
 * Outcome: Access to any public_level care (Visit to a PHF or CHW)
### Prepare Table 2 for children with any illnesses 

```{r}

table3_children <- 
  as.data.frame(array(data = NA, dim = c(2, 22)))
# "Indicator","Study group", "Denominator", "Numerator", "Percent (SE)", "Difference 
# (p-value)","Denominator", "Numerator", "Percent (SE)", "Difference (p-value)","Percent 
# relative change", "Percent absolute change", "Difference in differences (p-value)"
colnames(table3_children) <- 
  c("indicator", "group", "d_2014", "n_2014", "m_2014", "se_2014", "dif_2014", "p-val_2014", 
    "d_2016", "n_2016", "m_2016", "se_2016", "dif_2016", "p-val_2016", "trends_rel", 
    "trends_abs", "dif.in.dif(coef)_unadj_unwt", "p-val_did_unadj_unwt","dif.in.dif(coef)_unadj_wt", "p-val_did_unadj_wt","dif.in.dif(coef)_adj_wt", "p-val_did_adj_wt")
table3_children$indicator <- "ALL.public_level"

# Any visit (Access)
## Summary stats for 2014
table3_children[rev(c(1,2)), 2] <- c(0,1)
table3_children[rev(c(1,2)), 3] <- 
    round(svytable( ~group, design = svy_children_alldisease_2014)) # 'd_2014'
table3_children[rev(c(1,2)), 4] <- 
    round(coef(svyby(~ALL.public_level, by = ~group, design = svy_children_alldisease_2014, FUN = svytotal, keep.var = T, na.rm = T))) # 'n_2014'
table3_children[rev(c(1,2)), 5] <- 
    round(coef(svyby(~ALL.public_level, by = ~group, design = svy_children_alldisease_2014, FUN = svymean,keep.var = T, na.rm = T))*100,2) # 'm_2014'
table3_children[rev(c(1,2)), 6] <- 
    round(SE(svyby(~ALL.public_level, by = ~group, design = svy_children_alldisease_2014, FUN = svymean,keep.var = T, na.rm = T))*100,2) # 'se_2014'
table3_children[rev(c(1,2)), 7] <- table3_children[1, 5] - table3_children[2, 5]# 'dif_2014'

### Estimate p-value of the 2014 difference with a Chi2 test
  if (table3_children[1, 4] == 0 & 
      table3_children[2, 4] == 0) {
    table3_children[2, 8] <- NA
  } else {
    table3_children[2, 8] <- 
      round(svychisq(~ALL.public_level+group, design = svy_children_alldisease_2014)$p.value,2)
  }
## Summary stats for 2016

table3_children[rev(c(1,2)), 9] <- 
    round(svytable( ~group, design = svy_children_alldisease_2016)) # 'd_2016'
table3_children[rev(c(1,2)), 10] <- 
    round(coef(svyby(~ALL.public_level, by = ~group, design = svy_children_alldisease_2016, FUN = svytotal,keep.var = T, na.rm = T))) # 'n_2016'
table3_children[rev(c(1,2)), 11] <- 
    round(coef(svyby(~ALL.public_level, by = ~group, design = svy_children_alldisease_2016, FUN = svymean, keep.var = T, na.rm = T))*100,2) # 'm_2016'
table3_children[rev(c(1,2)), 12] <- 
    round(SE(svyby(~ALL.public_level, by = ~group, design = svy_children_alldisease_2016, FUN = svymean,keep.var = T, na.rm = T))*100,2) # 'se_2016'
table3_children[rev(c(1,2)), 13] <- table3_children[1, 11] - table3_children[2, 11]# 'dif_2016'

### Estimate p-value of the 2016 difference with a Chi2 test
  if (table3_children[1, 10] == 0 & 
      table3_children[2, 10] == 0) {
    table3_children[2, 14] <- NA
  } else {
    table3_children[2, 14] <- 
      round(svychisq(~ALL.public_level+group, design = svy_children_alldisease_2016)$p.value,2)
  }

## Estimate values for trends 2014-2016
  table3_children[rev(c(1, 2)), 15] <- 
    round((table3_children[rev(c(1, 2)), 11] - 
       table3_children[rev(c(1, 2)), 5]) / 
    table3_children[rev(c(1, 2)), 5]*100,2) # 'trends_rel'
  table3_children[rev(c(1, 2)), 16] <- 
    table3_children[rev(c(1, 2)), 11] - 
    table3_children[rev(c(1, 2)), 5] # 'trends_abs'

## DiD stats 
  #Unadj without balancing weight on wealth 
    #coef
  table3_children[2, 17] <- round(summary(did_children_unwtd_unadj)$coefficients[4, 1]*100,2)
    #P
  table3_children[2, 18] <- round(summary(did_children_unwtd_unadj)$coefficients[4, 4],2)
  #Unadj with balancing weight on wealth
    #coef
  table3_children[2, 19] <- round(summary(did_children_wtd_unadj)$coefficients[4, 1]*100,2)
    #P
  table3_children[2, 20] <- round(summary(did_children_wtd_unadj)$coefficients[4, 4],2)
  #Adj with balancing weight on wealth
    #coef
  table3_children[2, 21] <- round(summary(did_children_wtd)$coefficients[5, 1]*100,2)
    #P
  table3_children[2, 22] <- round(summary(did_children_wtd)$coefficients[5, 4],2)
```
### Final Supplemental Table 4 Children
```{r}
# Replicate final Table 2
final_table3_children<-data.frame(group=table3_children$group, d_2014=table3_children$d_2014, d_2016=table3_children$d_2016,trends_rel= table3_children$trends_rel,trends_abs= table3_children$trends_abs)
final_table3_children$m_2014 <- sprintf("%.2f (%.2f)", table3_children$m_2014, 
                                       table3_children$se_2014)
final_table3_children$m_2016 <- sprintf("%.2f (%.2f)", table3_children$m_2016, 
                                       table3_children$se_2016)
final_table3_children$dif_2014 <- ifelse(is.na(table3_children$`p-val_2014`),"--" ,sprintf("%.2f (%.2f)", table3_children$dif_2014,table3_children$`p-val_2014`))
final_table3_children$dif_2016 <- ifelse(is.na(table3_children$`p-val_2016`),"--" ,sprintf("%.2f (%.2f)", table3_children$dif_2016,table3_children$`p-val_2016`))



final_table3_children$`dif.in.dif(unadj_unwt)` <- ifelse(is.na(table3_children$`p-val_did_unadj_unwt`),"--" , 
                                                        sprintf("%.2f (%.2f)", 
                                                                  table3_children$`dif.in.dif(coef)_unadj_unwt`,
                                                                table3_children$`p-val_did_unadj_unwt`))
final_table3_children$`dif.in.dif(unadj_wt)` <- ifelse(is.na(table3_children$`p-val_did_unadj_wt`),"--" , 
                                                        sprintf("%.2f (%.2f)", 
                                                                table3_children$`dif.in.dif(coef)_unadj_wt`, 
                                                                table3_children$`p-val_did_unadj_wt`))
final_table3_children$`dif.in.dif(adj_wt)` <- ifelse(is.na(table3_children$`p-val_did_adj_wt`),"--" ,
                                                sprintf("%.2f (%.2f)", 
                                                  table3_children$`dif.in.dif(coef)_adj_wt`, 
                                                  table3_children$`p-val_did_adj_wt`))


final_table3_children <- as_tibble(final_table3_children) %>% 
  mutate(
    group = case_when(group == 1 ~ "IG", 
                      group == 0 ~ "NG")
   ) %>%
  rename("orig_DiD" = "dif.in.dif(unadj_unwt)","unadj_wt_DiD" = "dif.in.dif(unadj_wt)", "adj_wt_DiD" = "dif.in.dif(adj_wt)")
  
gt(final_table3_children ) %>%
  tab_header(title = "Table S4 (Balance Weighting). Summary statistics of responses and trends on any visits to a PHF or CHW for all children with symptoms of diarrhea, fever, and/or coughing with difficulty breathing from the 2014 and 2016 IHOPE surveys") %>%
  tab_style(style = cell_text(align = "left"), location = cells_title("title")) %>%
  tab_spanner(
    label = "2014",
    columns = c(d_2014, m_2014, dif_2014)
  ) %>%
  tab_spanner(
    label = "2016",
    columns = c(d_2016, m_2016, dif_2016)
  ) %>%
  tab_spanner(
    label = "2014-2016 Trend",
    columns = c(trends_rel, trends_abs, orig_DiD,unadj_wt_DiD,adj_wt_DiD)
  ) %>%
  tab_style(style = cell_text(weight = "bold"), 
            location = cells_column_spanners(spanners = everything())) %>%
    cols_label(group = "Study group", d_2014 = "N", m_2014 = "Percent (SE)", 
               dif_2014 = " Difference (p-value)", d_2016 = "N", 
               m_2016 = "Percent (SE)", dif_2016 = " Difference (p-value)", 
               trends_rel = "Percent relative change", 
               trends_abs = "Percent absolute change", orig_DiD="Original Unadjusted DiD (p value) ",unadj_wt_DiD="Unadjusted DiD with balance weighting (p value)", adj_wt_DiD="Adjusted DiD with balance weighting (p value)") %>%
    tab_style(style = cell_text(weight = "bold"), 
              location = cells_column_labels(columns = everything())) %>%
    gt::gtsave("table_fig/paper/Supplemental/table_s4_children_wt.png")

  

```
## Figure Children
```{r}
num_var <- 1

fig_children_wt <- as.data.frame(matrix(nrow = num_var * 4, ncol = 6))

###  UPDATE TABLE NAME AS NEEDED ###
fig_children_wt$V1 <- round(c(
  table3_children$m_2014, table3_children$m_2016), 2)
fig_children_wt$V2 <- round(c(
  table3_children$n_2014, table3_children$n_2016), 0)
fig_children_wt$V3 <- round(c(
  table3_children$d_2014, table3_children$d_2016), 0)
fig_children_wt$V4 <- rep(c("Intervention", "Non-Intervention"), num_var * 2)
fig_children_wt$V5 <- rep(c("2014 survey", "2016 survey"), each = num_var * 2)
fig_children_wt$V6 <- rep("Any Sick-Child Visits to a PHF or CHW", num_var * 4) ### RENAME BASED ON OUTCOME 

###
colnames(fig_children_wt) <- c("mean", "num", "dem", "group", "year", "indicator")
fig_children_wt$group <- factor(fig_children_wt$group, levels = c("Non-Intervention", "Intervention"))
fig_children_wt <- 
  ggplot(fig_children_wt, 
         aes(x = mean, y = group, group = interaction(group, indicator), 
             label = sprintf("%s/%s", num, dem))) +
  geom_point(aes(colour = year), size = 3.5) +
  scale_colour_manual(breaks = fig_children_wt$year, 
                      values = c("2014 survey" = "black", "2016 survey" = "red")) +
  geom_line() +
  labs(
    x = "Percentage", y = "",
    title = "Balancing Weight on Household Wealth Index: \n Any Sick-Child Visits to a PHF or CHW" ### UPDATE TITLE BASED ON OUTCOME ###
  ) +
  theme_bw() +
  scale_x_continuous(limits = c(0, 100), breaks = seq(0, 100, 10), expand = c(0, 0)) +
  scale_y_discrete(position = "right") +
  theme(
    legend.position = "bottom", axis.text = element_text(size = 10), 
    panel.grid.minor = element_blank(), legend.title = element_blank(),
    title = element_text(size = 12, face = "bold"), 
    axis.title = element_text(size = 13),
    legend.text = element_text(size = 12)
  )+
  annotate("text", label="}", x=65, y=1.5, size =20, fontface="plain")+
  annotate("text", label=paste0("Adjusted DiD (p-val): \n",final_table3_children$adj_wt_DiD[2]), x=80, y=1.5)
saveRDS(fig_children_wt,"table_fig/child/fig_child_wt.rds")
ggsave("table_fig/child/fig2children_wt.png")
```
